import re
import requests

baidu_url = "https://news.baidu.com/"

header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
response = requests.get(baidu_url, headers=header)
data = response.content.decode('utf-8')

#正则解析数据
#每个新闻的titile url

test_data = '<a href="https://wap.peopleapp.com/article/4984657/4876718" target="_blank" class="a3" mon="ct=1&amp;a=1&amp;c=top&amp;pn=1">2020新年贺词 习近平和大家谈"心"</a>'
pattern = re.compile('<a href="(.*?)" target="_blank" mon="(.*?)">(.*?)</a>')
result = pattern.findall(data)
for i in result[2:-2]:
    print(i[0] + i[-1])


# with open('02.html',mode='w',encoding='utf-8') as file:
#     file.write(data)
